clear 
est clear

**********************************************************************
* Attrition 
**********************************************************************

*** Prep data  *********************

use "$data\SMU_CleanData_Panel_Public", replace
sort cr_id location_branch location_club round

tab round 

* treatment values, attritors have missing treatment assignment
tab att_rr round, m 
tab att_midline round, m
table att_endline round, m
tab round randomization, m 

* work with baseline data since there is all we need
tab round randomization
keep if round==0 


*** create baseline controls *********************
*** Age 
su CR_age, d
* create age var called age for convenience 
drop age 
gen age = CR_age 

*** ever pregnant 
fre s_eversex
fre f_everpreg
tab f_everpreg s_eversex, m 
* ever pregnant is zero if never had sex 
gen everpregnant = 0 		if f_everpreg==0 | s_eversex==0
replace everpregnant = 1 	if f_everpreg==1  
* is missing in 51 cases, due to either sex of preg Q's missing 
tab everpregnant
mdesc everpregnant

*** ever married
*	count `as if' married as married
*	means divorced/separated/widowed are not married... 
fre ms_currentlymarried
gen evermarried = inlist(ms_currentlymarried,1,2,3,4,5,6) if !missing(ms_currentlymarried) & ms_currentlymarried>=0
tab ms_currentlymarried evermarried, m


*** list of controls 
local controls age everpregnant evermarried PPI_score phq8_score 
su `controls'
mdesc `controls' 

*** for missings, replace with median and create dummy indicating missingness 
foreach v of varlist everpregnant evermarried {
	di "var: `v'"
	* missing indicator 
	gen mis_`v' = missing(`v')
	tab mis_`v'
	* replace with median
	qui su `v' , d
	replace `v' = r(p50) if mis_`v'==1 
	* check 
	tab `v' mis_`v'
}

*** Center controls 
foreach v of local controls {
	qui su `v', meanonly
	replace `v' = `v'-r(mean)
}
su `controls'

*** Block FE's 
tab block, gen(blocks) 
foreach v of varlist blocks* {
	su `v' 
	replace `v' = `v' - r(mean)
}
su blocks*
 
*** Merge in sampling weights 
* 10 CRs missing from sampling weights file... hence the assert(match master)
merge 1:1 cr_id using "${data}/PhaseII_status_weights.dta", assert(match master) nogen // keepusing(sample_weight phase_II_sample)
tab sample_weight
*create a weight for each phase, set unweighted to 1 
gen w_rr = 1
gen w_mid = 1
gen w_end = sample_weight
replace w_end = 1 if missing(sample_weight)
label var w_rr  "Weights for RR, all equal to 1"
label var w_mid "Weights for BL, all equal to 1"
label var w_end "Weights for EL, PhaseII selection probabilities"
drop sample_weight
* create an insample variable
gen insample_rr  = 1
gen insample_mid = 1
gen insample_end = 1
* fix wrong weights at end 
replace w_end = 1 if w_end == 2
* remove unsampled individuals from w3 
replace w_end = . if phone_sample_PhaseII == 0 & field_sample_PhaseII == 0
replace insample_end = 0 if phone_sample_PhaseII == 0 & field_sample_PhaseII == 0
 
*** Fix up labels 
label var PPI_score 		"PPI"
label var age 				"Age"
label var everpregnant 		"Ever pregnant"
label var evermarried 		"Ever married"
label var phq8_score 		"PHQ-8"

* label treatment nicely 	
label define randomization 0 "Control" 1 "IPT-G" 2 "IPT-G+", modify
label values randomization randomization
tab randomization
rename randomization treat 
tab treat 
 
* dummy for any treatment 
gen treat_any = treat!=0 
label var treat_any "IPT-G (Any)"
tab treat_any treat 

* labels for outcome
label var att_rr "RR"
label var att_midline "Midline"
label var att_endline "Endline"

* doublecheck the vars we'll use ...
mdesc att_* treat_* block location_club 
distinct block location_club


*** Regressions *********************

*** Combined treatment 
local ys rr mid end 

local ctrls age everpregnant evermarried PPI_score phq8_score 

foreach y of local ys {
	
	*** Unadjusted *******************
	* Linear Model: treatment, Block FE, Cluster SEs at Club level
	eststo att_`y'_any:	reg att_`y'	b0.treat_any##c.(blocks*) 				[pw=w_`y'] if insample_`y'==1, vce(cluster club)
		*** add scalars
		* control mean
		qui su att_`y' [aw=w_`y'] if insample_`y'==1 & treat_any==0  
		estadd scalar c_mean = r(mean) 	
		* p-value 
		test 1.treat_any == 0 
		estadd scalar pval_any = r(p)
		* control 
		estadd local ctrls "No"
	* Linear Model: treatment, Block FE, Cluster SEs at Club level
	eststo att_`y':	reg att_`y'		b0.treat##c.(blocks*)					[pw=w_`y'] if insample_`y'==1, vce(cluster club)
		*** add scalars 
		* control mean
		qui su att_`y' [aw=w_`y'] if insample_`y'==1 & treat_any==0   
		estadd scalar c_mean = r(mean) 	
		* p-value 
		test 1.treat == 0 
		estadd scalar pval_ipt = r(p)
		test 2.treat == 0 
		estadd scalar pval_iptplus = r(p)
		test 1.treat == 2.treat 
		estadd scalar pval_eq = r(p)
		testparm 1.treat 2.treat 
		estadd scalar pval_both = r(p)		
		* control 
		estadd local ctrls "No"
		 
	*** Adjusted *******************
	* Treat Any
	*** Linear Model: treatment fully interacted, controls, Block FE, Cluster SEs at Club level
	eststo att_`y'_any_adj:	reg att_`y'	c.(`ctrls' blocks*)##b0.treat_any	[pw=w_`y'] if insample_`y'==1, vce(cluster club)
		*** add scalars
		* control mean
		qui su att_`y'  [aw=w_`y'] if insample_`y'==1 & treat_any==0  
		estadd scalar c_mean = r(mean) 	
		* p-value 
		test 1.treat_any == 0 
		estadd scalar pval_any = r(p)
		* joint F-test 
		testparm c.(`ctrls')#1.treat_any 
		estadd scalar ftest_pval_any = r(p)
		testparm c.(`ctrls') 
		estadd scalar ftest_pval_ctrls = r(p) 
 		* control 
		estadd local ctrls "Yes"
	*** Linear Model: treatment fully interacted, controls, Block FE, Cluster SEs at Club level
	eststo att_`y'_adj:	reg att_`y'		c.(`ctrls' blocks*)##b0.treat 		[pw=w_`y'] if insample_`y'==1, vce(cluster club)
		*** add scalars
		* control mean
		qui su att_`y'  [aw=w_`y'] if insample_`y'==1 & treat_any==0  
		estadd scalar c_mean = r(mean) 	
		* p-values 
		test 1.treat == 0 
		estadd scalar pval_ipt = r(p)
		test 2.treat == 0 
		estadd scalar pval_iptplus = r(p)
		test 1.treat == 2.treat 
		estadd scalar pval_eq = r(p)
		testparm 1.treat 2.treat 
		estadd scalar pval_both = r(p)	
		* joint F-tests
		testparm c.(`ctrls')#1.treat 
		estadd scalar ftest_pval_ipt = r(p)
		testparm c.(`ctrls')#2.treat 
		estadd scalar ftest_pval_iptplus = r(p) 
		testparm c.(`ctrls') 
		estadd scalar ftest_pval_ctrls = r(p) 
		* control 
		estadd local ctrls "Yes"
}

* Main text version 
#delimit ;
esttab 	att_rr_any  att_rr_any_adj
		att_rr   	att_rr_adj  
		att_mid  	att_mid_adj 
		att_end  	att_end_adj  
	using "${tables}/attrition.tex", replace booktabs
	label b(3) se(3) nostar nonote nobaselev nogaps frag nomtitle nonumber
	keep(1.treat_any 1.treat 2.treat)
	stats(c_mean N ctrls
		  pval_any pval_ipt pval_iptplus pval_eq  
		  ftest_pval_ctrls ftest_pval_any ftest_pval_ipt ftest_pval_iptplus , fmt(3 0 3 3 3 3 3 3 3 3)  
	label("Control mean" 
		  "Observations"
		  "Covariates"
		  "H0: IPT-G (any)=0" 
		  "H0: IPT-G=0" 
		  "H0: IPT-G+=0" 
		  "H0: IPT-G=IPT-G+" 
  		  "H0: Controls=0"
		  "H0: IPT-G (any) int.=0" 
		  "H0: IPT-G int.=0" 
		  "H0: IPT-G+ int.=0")) 
	;
#delimit cr

* Appendix version with covariate coefficients shown
* exclude the any... att_rr_any  att_rr_any_adj
#delimit ;
esttab 	
		att_rr   	att_rr_adj  
		att_mid  	att_mid_adj 
		att_end  	att_end_adj  
	using "${tables}/attrition_blcovars.tex", replace booktabs
	label b(3) se(3) nostar nonote nobaselev nogaps frag nomtitle nonumber
	drop(_cons *blocks*) interaction(" X ") 
	nostar
	stats(c_mean N 
		  pval_ipt pval_iptplus pval_eq  
		  ftest_pval_ctrls ftest_pval_ipt ftest_pval_iptplus , fmt(3 0 3 3 3 3 3 3 3)  
	label("Control mean" 
		  "Observations"
		  "H0: IPT-G=0" 
		  "H0: IPT-G+=0" 
		  "H0: IPT-G = IPT-G+" 
  		  "H0: Controls=0"
		  "H0: IPT-G interactions=0" 
		  "H0: IPT-G+ interactions=0")) 
	;
#delimit cr

